Text analysis: title and abstract of male and female speakers
Title analysis
data <- read.table("data/presentations_PPGE_2008-2019.csv", sep=",",
header=T, as.is=T)
data$date <- dmy(data$date)
data$year <- year(data$date)
#skimr::skim(data)Excluding special events as round tables and discussions not related to a project or study presented by someone.
IDs <- c(154, 250, 211, 289)
data <- data %>% filter(!id %in% IDs) %>% filter(!is.na(title_english))table(data$position_cat, data$gender)##
## F M
## others 4 1
## postdoc 25 32
## professor 25 75
## student 88 75
Tidytext
tit <- data %>% dplyr::select(id,gender,position_cat, audience_n,
title_english)
text_tok <- tit %>% unnest_tokens(output=word,
input=title_english)stopwords - excluir word que nĂŁo agregam como âandâ âorâ âtheâ âofâ âinâ
# lista das stopwords em ingles
stop_w <- tibble(word = stopwords(source = "stopwords-iso"))
#retirar do corpus as stopwords
text <- text_tok %>%
anti_join(stop_w, by="word")
# retirar nĂșmeros e travessĂŁo e outras word
remover <- c("ăŒ", "1", "1st", "2", "364", "40", "70", "750", "aff", "da")
text <- text %>% filter(!word %in% remover )
# resolvendo plurais simples - sĂł cortando o S
plural <- c("actions","advances", "adaptations", "amphibians", "animals", "ants","anurans",
"applications","approaches", "bees","builds", "birds",
"cerrados","challenges",
"continents","crops",
"decisions","declines","determines","determinants", "defenses",
"dynamics",
"economics", "ecosystems","environments", "experiences",
"forests",
"genetics","gifts","gradients","guides","impacts",
"increases","interactions","lives",
"landscapes","males","mammals", "mangroves","models","movements",
"mutualisms","networks","neotropics",
"opilions","phenotypes","plants","projects","paths", "perspectives",
"populations","promotes","relationships", "relations",
"resources","responses","roads","services","skulls","snakes","seeds",
"spaces", "spiders","stages", "trees", "variations",
"threats")
text$word[text$word %in% plural] <-
substr(text$word[text$word %in% plural],
1,nchar(text$word[text$word %in% plural])-1)Agrupando word parecidas
lemma <- rbind(c("adaptive", "adaptation"),
c("advancement", "advance"),
c("agricultural", "agriculture"),
c("agro", "agriculture" ),
c("amazonia","amazon" ),
c("amazonian","amazon" ),
c("andean","andes"),
c("apply","application"),
c("applying","application"),
c("apidae","apis"),
c("arachnida","arachnid"),
c("argue","argument"),
c("basal", "basis"),
c("behavioral","behavior"),
c("behavioural","behavior"),
c("bignonieae", "bignoniaceae"),
c("biological", "biology"),
c("brazilian","brazil"),
c("building","build"),
c("changing", "change"),
c("cnidarian", "cnidaria"),
c("coastal","coast"),
c("colour", "color"),
c("colors", "color"),
c("communities","community" ),
c("competitive", "competition"),
c("complexity", "complex"),
c("convergences", "convergence"),
c("convergent", "convergence"),
c("cordatus","cordata" ),
c("croplands","crop"),
c( "cultural", "culture"),
c("darwin's", "darwin"),
c("darwinian", "darwin"),
c("defensive", "defense"),
c("dependent","dependence"),
c("detecting","detection"),
c("determine", "determinant"),
c("developmental", "development"),
c("dispersers","dispersal"),
c("disturbed", "disturbance"),
c("diversification", "diversity"),
c("dragonflies", "dragonfly"),
c("drier", "drought"),
c("ecological", "ecology"),
c("ecologists", "ecology"),
c("endemic", "endemism"),
c("effectiveness", "efficiency"),
c("environmental", "environment"),
c("evolutionary", "evolution"),
c("expanding", "expansion"),
c("extinct", "extinction"),
c("facilitate", "facilitation"),
c("fisheries", "fishery"),
c("floral", "flora"),
c("floristic", "flora"),
c("forested", "forest"),
c("functional", "function"),
c("functionally", "function"),
c("functioning", "function"),
c("geographical", "geographic"),
c("heterogeneties", "heterogeneity"),
c("heterogeneous", "heterogeneity"),
c("histories", "history"),
c("integrated", "integration"),
c("intregating", "integration"),
c("integrative", "integration"),
c("invasive", "invasion"),
c("isotopic", "isotope"),
c("linking", "link"),
c("living", "live"),
c("mammalia", "mammal"),
c("managed", "manage"),
c("managers", "manage"),
c("mathematical", "mathematics"),
c("mates", "mating"),
c("mediated", "mediate"),
c("mechanistic", "mechanism"),
c("matrices", "matrix"),
c("migratory", "migration"),
c("mimicking", "mimicry"),
c("modeling", "model"),
c("mutualistic", "mutualism"),
c("natural", "nature"),
c("neotropical", "neotropic"),
c("northeastern", "northeast"),
c("occuring", "occur"),
c("onça", "onca"),
c("opiliones", "opilion"),
c("parasite", "parasitism"),
c("parent", "parenting"),
c("phylogenies", "phylogeny"),
c("phylogenetic", "phylogeny"),
c("phylogenomic", "phylogeny"),
c("pollinators", "pollination"),
c("protected", "protect"),
c("protective", "protect"),
c("rainfall", "rain"),
c("reconstructing", "reconstruction"),
c("regulatory", "regulation"),
c("regulates", "regulation"),
c("relation", "relationship"),
c("reproductive", "reproduction"),
c("restored", "restoration"),
c("robustness", "robust"),
c("scientific", "science"),
c("scientist", "science"),
c("sexy", "sexual"),
c("simulated", "simulation"),
c("societies", "society"),
c("social", "society"),
c("socio", "society"),
c("space", "spatial"),
c("spacio", "spatial"),
c("stabilize", "stability"),
c("stable", "stability"),
c("stories", "story"),
c("strategic", "strategy"),
c("strategies", "strategy"),
c("structured", "structure"),
c("structuring", "structure"),
c("studies", "study"),
c("studing", "study"),
c("sustainable", "sustainability"),
c("theories", "theory"),
c("theoretical", "theory"),
c("threatened", "threat"),
c("tropical", "tropic"),
c("vision", "visual")
)
lemma <- as.data.frame(lemma)
for (i in 1:dim(lemma)[1]){
text$word[text$word == lemma[i,1]] <- lemma[i,2]
}contando as word
pala <- text %>%
count(word) palavra mais comuns
text %>%
count(word, sort = TRUE) %>%
filter(n>8)%>%
kable()| word | n |
|---|---|
| ecology | 49 |
| forest | 42 |
| evolution | 32 |
| landscape | 27 |
| bird | 22 |
| model | 22 |
| diversity | 21 |
| environment | 21 |
| species | 21 |
| plant | 18 |
| structure | 17 |
| atlantic | 15 |
| brazil | 15 |
| effects | 15 |
| conservation | 14 |
| interaction | 13 |
| bee | 12 |
| community | 12 |
| network | 12 |
| patterns | 12 |
| sĂŁo | 12 |
| study | 12 |
| application | 11 |
| behavior | 11 |
| dynamic | 11 |
| ecosystem | 11 |
| paulo | 11 |
| population | 11 |
| role | 11 |
| change | 10 |
| male | 10 |
| mutualism | 10 |
| neotropic | 10 |
| pollination | 10 |
| science | 10 |
| sexual | 10 |
| animal | 9 |
| biology | 9 |
| care | 9 |
| cerrado | 9 |
| genetic | 9 |
| habitat | 9 |
| mating | 9 |
| opilion | 9 |
| selection | 9 |
| society | 9 |
props <- text %>%
count(gender, word) %>%
#filter(n>1) %>% # removendo word ditas apenas uma vez
group_by(gender) %>%
mutate(proportion = n / sum(n)) %>%
pivot_wider(names_from = gender, values_from = c(proportion,n))library(scales)
ggplot(props, aes(x=proportion_M,, y=proportion_F),
color=abs(proportion_F-proportion_M)) +
geom_abline(color = "gray40", lty = 2) +
geom_jitter(alpha = 0.1, size = 2.5, width = 0.3, height = 0.3) +
geom_text(aes(label=word),check_overlap = TRUE,vjust = 1.5) +
scale_x_log10(labels = percent_format(), limits=c(0.0005,0.03)) +
scale_y_log10(labels = percent_format(),limits=c(0.0005,0.03)) +
scale_color_gradient(low = "blue", high = "red") ggplot(props, aes(x=proportion_M, y=proportion_F))+ geom_point(alpha=0.1)+
geom_abline(color = "gray40", lty = 2) +
geom_text(aes(label=word),check_overlap = TRUE)ggplot(props, aes(x=n_M, y=n_F))+ geom_point(alpha=0.1)+
geom_abline(color = "gray40", lty = 2) +
geom_text(aes(label=word),check_overlap = TRUE) +
xlim(-1,30) + ylim(-1,30)seleciona <- pala %>% arrange(desc(n)) %>% filter(n>8)
props <- text %>% filter(word %in% seleciona$word) %>%
count(gender, word) %>%
group_by(gender) %>%
mutate(proportion = n / sum(n)) %>%
pivot_wider(names_from = gender, values_from = c(proportion,n))
test <- props %>% arrange(desc(proportion_F), desc(proportion_M)) %>%
mutate(ntot = n_F + n_M) %>%
mutate(word = fct_reorder(word,(ntot),max))
test$proportion_F <- test$proportion_F*-1
test <- test [,1:3] %>%
pivot_longer(2:3,names_to = "gender", values_to ="proportion")
test %>% filter(!word %in% c("animal", "behavior", "opilion", "role",
"science", "sexual","nework")) %>%
ggplot(aes(x=proportion, y=word,fill=gender)) +
geom_col()+ ylab("") + xlab("Proportion")+
scale_fill_manual(name="gender", values=c("#6D57CF","#FCA532"),
labels=c("F", "M"))+
geom_vline(xintercept = c(-0.1,-0.05,-0.02,0,0.02,0.05,0.10),
linetype="dotted",
col="darkgray") +
scale_x_continuous(breaks=c(-0.1,-0.05,0,0.05,0.10),
labels = c(0.10,0.05,0,0.05,0.10))ggsave("figures/title_wordFrequency.jpeg", units="in", width=7, height=7, dpi=300)word cloud
textplot_wordcloud(x=dfm(tokens(text$word)))par(mfrow=c(1,2))
textplot_wordcloud(x=dfm(tokens(text$word[text$gender=="F"])),
col="#6D57CF")
par(new=T)
textplot_wordcloud(x=dfm(tokens(text$word[text$gender=="M"])),
col="#FCA532")TF IDF
text_id <- text %>% count(gender, word) %>%
bind_tf_idf(word, gender, n) %>%
arrange(desc(tf_idf))
#text_idtext_id$word <- as.factor(text_id$word)
text_id %>%
group_by(gender) %>%
arrange(desc(tf_idf)) %>%
top_n(5, tf_idf) %>%
ggplot(aes(x = tf_idf, y = reorder(word, tf_idf), fill = gender)) +
geom_col(show.legend = FALSE) +
labs(x = NULL, y = "tf-idf") +
facet_wrap(~gender, scales = "free") +
theme_minimal()N-GRAMS
bigrams <- tit %>%
unnest_tokens(bigram, title_english, token = "ngrams", n = 2)excluindo stopwords
bigrams <- bigrams %>%
separate(col = bigram,
into = c("word1", "word2"),
sep = " ",
remove = FALSE)bigrams_stop <- bigrams %>%
filter(!word1 %in% stop_words$word & !word2 %in% stop_words$word)bigrams_stop %>%
count(gender,bigram, sort = TRUE)test <- bigrams_stop %>%
count(gender,bigram, sort = TRUE) %>%
filter(n > 1) %>%
mutate(n2 = n)
test$n2[test$gender == "F"] <- test$n2[test$gender == "F"]*-1
test$bigram = fct_reorder(test$bigram, test$n2,min)
ggplot(test, aes(x=n2, y=fct_rev(bigram), fill=gender)) + geom_col()bigrams_stop %>%
count(gender,bigram, sort = TRUE) %>%
filter(n > 1) %>%
ggplot(aes(x = reorder(bigram, n),
y = n)) +
geom_col(show.legend = FALSE) +
labs(x = NULL, y = "frequency", title = "Most frequent bigrams") +
coord_flip() +
facet_wrap(~gender) +
theme_minimal()To find bigrams that contain specific words, we can use filter():
bieco <- bigrams_stop %>% group_by(gender) %>%
filter(str_detect(bigram, "ecolog"))
bieco %>%
count(gender,bigram, sort=T) %>%
pivot_wider(names_from = gender, values_from = n)## # A tibble: 35 Ă 3
## bigram M F
## <chr> <int> <int>
## 1 behavioral ecology 3 NA
## 2 ecological approach NA 2
## 3 landscape ecology 2 2
## 4 ecological interactions 2 NA
## 5 ecological niche 2 NA
## 6 ecological analysis NA 1
## 7 ecological context NA 1
## 8 ecological processes NA 1
## 9 ethno ecology NA 1
## 10 human ecology NA 1
## # ⊠with 25 more rows
bigrams_stop %>% group_by(gender) %>%
filter(str_detect(bigram, "evolu")) %>%
distinct(bigram) %>%
count(gender,bigram, sort=T) %>%
pivot_wider(names_from = gender, values_from = n)## # A tibble: 14 Ă 3
## bigram F M
## <chr> <int> <int>
## 1 convergent evolution 1 NA
## 2 evolutionary constraint 1 NA
## 3 colors evolution NA 1
## 4 ecology evolution NA 1
## 5 evolutionary biology NA 1
## 6 evolutionary convergences NA 1
## 7 evolutionary ecology NA 1
## 8 evolutionary game NA 1
## 9 evolutionary innovation NA 1
## 10 evolutionary patterns NA 1
## 11 evolutionary radiation NA 1
## 12 evolutionary trajectory NA 1
## 13 guilds evolution NA 1
## 14 micro evolution NA 1
quase nao tem mulher que fala de evolução
bigrams_stop %>% group_by(gender) %>%
filter(str_detect(bigram, "forest")) %>%
distinct(bigram) %>%
count(gender,bigram, sort=T) %>%
pivot_wider(names_from = gender, values_from = n)## # A tibble: 30 Ă 3
## bigram F M
## <chr> <int> <int>
## 1 atlantic forest 1 1
## 2 forest bird 1 1
## 3 forest birds 1 NA
## 4 forest corridors 1 1
## 5 forest cover 1 NA
## 6 forest field 1 NA
## 7 forest forests 1 NA
## 8 forest landscape 1 NA
## 9 forest loss 1 NA
## 10 forest products 1 NA
## # ⊠with 20 more rows
wordcloud bigram ecology
bieco <- bigrams_stop %>% group_by(gender) %>%
filter(str_detect(bigram, "ecolog"))
bieco$word1[bieco$word1 == "ecological"] <- "ecology"
bieco$word2[bieco$word2 == "ecological"] <- "ecology"
bieco$bigram <- paste(bieco$word1,bieco$word2)
par(mfrow=c(1,2))
textplot_wordcloud(x=dfm(tokens(bieco$bigram[bieco$gender=="F"])), min_count = 1,
col="#6D57CF")
par(new=T)
textplot_wordcloud(x=dfm(tokens(bieco$bigram[bieco$gender=="M"])), min_count = 1,
col="#FCA532")bievo <- bigrams_stop %>% group_by(gender) %>%
filter(str_detect(bigram, "evol"))
bievo$word1[bievo$word1 == "evolutionary"] <- "evolution"
bievo$word2[bievo$word2 == "evolutionary"] <- "evolution"
bievo$bigram <- paste(bievo$word1,bievo$word2)
par(mfrow=c(1,2))
textplot_wordcloud(x=dfm(tokens(bievo$bigram[bievo$gender=="F"])), min_count = 1,
col="#6D57CF", rotation=0)
par(new=T)
textplot_wordcloud(x=dfm(tokens(bievo$bigram[bievo$gender=="M"])), min_count = 1,
col="#FCA532", rotation=0)TF_IDF bigram
bigram_tfidf <- bigrams_stop %>%
count(gender, bigram) %>%
bind_tf_idf(bigram, gender, n)
# bigram_tfidf %>%
# arrange(desc(tf_idf))bigram_tfidf %>%
group_by(gender) %>%
slice_max(tf_idf, n = 3) %>%
ungroup() %>%
ggplot() +
aes(x = tf_idf,
y = fct_reorder(bigram, tf_idf),
fill = gender) +
geom_col(show.legend = FALSE) +
facet_wrap(~ gender, scales = "free") +
labs(x = "tf-idf", y = NULL) +
theme_minimal()alice_graph <- bigrams_stop %>%
count(word1, word2) %>% # we need the words separated for this graph
filter(n > 1) %>%
graph_from_data_frame()
set.seed(2021)
ggraph(alice_graph, layout = "fr") +
geom_edge_link() +
geom_node_point() +
geom_node_text(aes(label = name),
vjust = 1, hjust = 1)alice_graph <- bieco %>% filter(gender=="F") %>% ungroup() %>%
count(word1, word2) # we need the words separated for this graph
alice_graph$word1[alice_graph$word1=="ecological"] <- "ecology"
alice_graph$word2[alice_graph$word2=="ecological"] <- "ecology"
alice_graph <- alice_graph %>%
graph_from_data_frame()
set.seed(2021)
ggraph(alice_graph, layout = "fr") +
geom_edge_link() +
geom_node_point() +
geom_node_text(aes(label = name),
vjust = 1, hjust = 0.5)alice_graph <- bieco %>% filter(gender=="M") %>% ungroup() %>%
count(word1, word2) # we need the words separated for this graph
alice_graph$word1[alice_graph$word1=="ecological"] <- "ecology"
alice_graph$word2[alice_graph$word2=="ecological"] <- "ecology"
alice_graph <- alice_graph %>%
graph_from_data_frame()
set.seed(2022)
ggraph(alice_graph, layout = "fr") +
geom_edge_link() +
geom_node_point() +
geom_node_text(aes(label = name),
vjust = 1, hjust = 0.5)alice_graph <- bigrams_stop %>% group_by(gender) %>%
count(word1, word2) %>% # we need the words separated for this graph
filter(n > 1) %>%
graph_from_data_frame()
set.seed(2021)
ggraph(alice_graph, layout = "fr") +
geom_edge_link() +
geom_node_point() +
geom_node_text(aes(label = name),
vjust = 1, hjust = 1)We get a sense of which words occur together, but the graph could definitely look prettier and itâs unclear which word occurs first: is it ârose treeâ or âtree roseâ, for example?
Weâll create an object called âaâ that saves an arrow shape:
a <- grid::arrow(type = "closed", length = unit(.15, "inches"))This way, we can indicate how the words in the bigrams are ordered.
Nicer graph:
ggraph(alice_graph, layout = "fr") +
geom_edge_link(aes(edge_alpha = 1), #n # the links are more transparent if the bigram is rare
show.legend = FALSE,
arrow = a, end_cap = circle(.03, 'inches')) + #adding the arrows, making sure they don't touch the node
geom_node_point(color = "#34013f", size = 3) + # larger, purple nodes
geom_node_text(aes(label = name), vjust = 1, hjust = 1) +
theme_void() +
labs(title = 'Bigrams (two-word combinations)"')Abstract + title
data <- read.table("data/presentations_PPGE_2008-2019.csv", sep=",",
header=T, as.is=T)
data$date <- dmy(data$date)
data$year <- year(data$date)
#skimr::skim(data)Excluding special events as round tables and discussions not related to a project or study presented by someone.
Using abstracts in portuguese
IDs <- c(154, 250, 211, 289)
data <- data %>% filter(!id %in% IDs) %>% filter(!is.na(abstract_original),
abstract_language=="port") table(data$gender)##
## F M
## 86 101
Tidytext
tit <- data %>% dplyr::select(id,gender,position_cat, audience_n,
abstract_original, title_original) %>%
mutate(text = paste(title_original, abstract_original))
text_tok <- tit %>% unnest_tokens(output=word,
input=text)
stop_w <- tibble(word = stopwords("pt"))
#retirar do corpus as stopwords
text <- text_tok %>%
anti_join(stop_w, by="word") %>%
filter(!word %in% c("Ă©", "sobre", "ser"))pala <- text %>%
count(word) palavra mais comuns
text %>%
count(word, sort = TRUE) %>%
filter(n>8)%>%
kable()| word | n |
|---|---|
| espécies | 224 |
| machos | 78 |
| diferentes | 75 |
| ĂĄreas | 70 |
| podem | 66 |
| padrÔes | 64 |
| diversidade | 61 |
| ecologia | 61 |
| estudo | 60 |
| estudos | 58 |
| paisagem | 57 |
| pode | 54 |
| espécie | 53 |
| evolução | 50 |
| habitat | 50 |
| resultados | 50 |
| comunidades | 49 |
| florestas | 49 |
| processos | 48 |
| grande | 47 |
| modelo | 47 |
| maior | 46 |
| dados | 45 |
| populaçÔes | 45 |
| brasil | 44 |
| ambientais | 43 |
| palestra | 43 |
| plantas | 43 |
| além | 42 |
| efeitos | 42 |
| estrutura | 41 |
| mudanças | 41 |
| uso | 41 |
| indivĂduos | 40 |
| interaçÔes | 40 |
| recursos | 40 |
| trabalho | 40 |
| paisagens | 39 |
| atlĂąntica | 38 |
| conservação | 38 |
| comportamento | 37 |
| durante | 37 |
| seleção | 37 |
| ainda | 36 |
| fatores | 36 |
| forma | 36 |
| abelhas | 35 |
| mata | 35 |
| naturais | 35 |
| distribuição | 34 |
| ecolĂłgicas | 33 |
| aves | 31 |
| bem | 31 |
| disso | 31 |
| extinção | 31 |
| importante | 31 |
| informaçÔes | 31 |
| modelos | 31 |
| nesta | 31 |
| animais | 30 |
| longo | 30 |
| natural | 30 |
| redes | 30 |
| dinĂąmica | 29 |
| fĂȘmeas | 29 |
| insetos | 29 |
| ĂĄrea | 28 |
| comunidade | 28 |
| entender | 28 |
| florestais | 28 |
| outros | 28 |
| papel | 28 |
| quais | 28 |
| relação | 28 |
| abordagem | 27 |
| cada | 27 |
| estratégias | 27 |
| fragmentos | 27 |
| hipĂłtese | 27 |
| paulo | 27 |
| risco | 27 |
| sistema | 27 |
| variação | 27 |
| ambientes | 26 |
| campo | 26 |
| dois | 26 |
| genética | 26 |
| mecanismos | 26 |
| objetivo | 26 |
| onde | 26 |
| parte | 26 |
| pesquisa | 26 |
| quanto | 26 |
| anĂĄlise | 25 |
| biodiversidade | 25 |
| caracterĂsticas | 25 |
| conhecimento | 25 |
| ecolĂłgicos | 25 |
| ecossistemas | 25 |
| importĂąncia | 25 |
| neste | 25 |
| regiĂŁo | 25 |
| alguns | 24 |
| efeito | 24 |
| processo | 24 |
| sendo | 24 |
| sexual | 24 |
| tempo | 24 |
| grupo | 23 |
| modelagem | 23 |
| presença | 23 |
| principais | 23 |
| qualidade | 23 |
| sistemas | 23 |
| anĂĄlises | 22 |
| anos | 22 |
| desenvolvimento | 22 |
| dessas | 22 |
| dispersĂŁo | 22 |
| diversificação | 22 |
| entanto | 22 |
| importantes | 22 |
| manejo | 22 |
| perda | 22 |
| produção | 22 |
| protegidas | 22 |
| sido | 22 |
| 1 | 21 |
| apesar | 21 |
| através | 21 |
| condiçÔes | 21 |
| deve | 21 |
| duas | 21 |
| locais | 21 |
| meio | 21 |
| população | 21 |
| vegetação | 21 |
| 2 | 20 |
| alta | 20 |
| ambiente | 20 |
| benefĂcios | 20 |
| biologia | 20 |
| ciĂȘncia | 20 |
| possuem | 20 |
| pouco | 20 |
| sobrevivĂȘncia | 20 |
| variaçÔes | 20 |
| acasalamento | 19 |
| ambiental | 19 |
| apresentar | 19 |
| conectividade | 19 |
| disponibilidade | 19 |
| diversos | 19 |
| elementos | 19 |
| grupos | 19 |
| muitas | 19 |
| padrĂŁo | 19 |
| principalmente | 19 |
| serviços | 19 |
| sociais | 19 |
| sucesso | 19 |
| tamanho | 19 |
| tanto | 19 |
| trĂȘs | 19 |
| assim | 18 |
| biomassa | 18 |
| campos | 18 |
| caracteres | 18 |
| cobertura | 18 |
| fauna | 18 |
| florestal | 18 |
| formigas | 18 |
| fragmentadas | 18 |
| interação | 18 |
| maioria | 18 |
| of | 18 |
| projeto | 18 |
| tĂȘm | 18 |
| utilizando | 18 |
| animal | 17 |
| ĂĄrvores | 17 |
| competição | 17 |
| desta | 17 |
| estado | 17 |
| evolutiva | 17 |
| nessa | 17 |
| novas | 17 |
| organismos | 17 |
| partir | 17 |
| possĂvel | 17 |
| restauração | 17 |
| tropicais | 17 |
| vida | 17 |
| algumas | 16 |
| apresentados | 16 |
| capacidade | 16 |
| cerrado | 16 |
| climĂĄticas | 16 |
| consequĂȘncias | 16 |
| custos | 16 |
| dentro | 16 |
| entretanto | 16 |
| estratégia | 16 |
| floresta | 16 |
| funcional | 16 |
| fundamental | 16 |
| histĂłria | 16 |
| medidas | 16 |
| menos | 16 |
| mutualismos | 16 |
| norte | 16 |
| porém | 16 |
| regiÔes | 16 |
| relacionados | 16 |
| tais | 16 |
| terra | 16 |
| tipo | 16 |
| abordagens | 15 |
| apresentarei | 15 |
| atividades | 15 |
| avaliar | 15 |
| clima | 15 |
| comportamentais | 15 |
| construção | 15 |
| cuidado | 15 |
| dessa | 15 |
| desses | 15 |
| deste | 15 |
| ecolĂłgica | 15 |
| escala | 15 |
| grandes | 15 |
| neotropicais | 15 |
| nĂvel | 15 |
| polinização | 15 |
| recentes | 15 |
| rio | 15 |
| ĂĄgua | 14 |
| apenas | 14 |
| apĂłs | 14 |
| ciĂȘncias | 14 |
| composição | 14 |
| enquanto | 14 |
| espacial | 14 |
| formação | 14 |
| mundo | 14 |
| nesse | 14 |
| populacional | 14 |
| serem | 14 |
| sob | 14 |
| trabalhos | 14 |
| vou | 14 |
| 3 | 13 |
| abundĂąncia | 13 |
| açÔes | 13 |
| apresentação | 13 |
| apresentam | 13 |
| aspectos | 13 |
| baixa | 13 |
| cerca | 13 |
| complexos | 13 |
| corais | 13 |
| corredores | 13 |
| defesa | 13 |
| desenvolvidos | 13 |
| devido | 13 |
| doutorado | 13 |
| ecolĂłgico | 13 |
| entendimento | 13 |
| escalas | 13 |
| especiação | 13 |
| evolutivos | 13 |
| exemplo | 13 |
| facilitação | 13 |
| ferramenta | 13 |
| impactos | 13 |
| in | 13 |
| influĂȘncia | 13 |
| local | 13 |
| matriz | 13 |
| menor | 13 |
| natureza | 13 |
| nĂșmero | 13 |
| polĂticas | 13 |
| presente | 13 |
| principal | 13 |
| quantidade | 13 |
| relaçÔes | 13 |
| reprodutivo | 13 |
| respostas | 13 |
| riqueza | 13 |
| sementes | 13 |
| soja | 13 |
| ter | 13 |
| variĂĄveis | 13 |
| vez | 13 |
| adaptaçÔes | 12 |
| alimentares | 12 |
| alunos | 12 |
| anfĂbios | 12 |
| anuros | 12 |
| apresenta | 12 |
| associação | 12 |
| associadas | 12 |
| aumentar | 12 |
| carbono | 12 |
| cientĂficos | 12 |
| comportamental | 12 |
| consumo | 12 |
| contra | 12 |
| desde | 12 |
| desse | 12 |
| dimorfismo | 12 |
| espaciais | 12 |
| evidĂȘncias | 12 |
| famĂlia | 12 |
| fenotĂpica | 12 |
| ferramentas | 12 |
| fragmentação | 12 |
| geogrĂĄfica | 12 |
| ilhas | 12 |
| literatura | 12 |
| ocorrĂȘncia | 12 |
| organização | 12 |
| paternal | 12 |
| perspectivas | 12 |
| teoria | 12 |
| todo | 12 |
| tradicionais | 12 |
| alimentação | 11 |
| amazĂŽnia | 11 |
| amplamente | 11 |
| atributos | 11 |
| atual | 11 |
| atualmente | 11 |
| bandos | 11 |
| biolĂłgica | 11 |
| caso | 11 |
| cientĂfico | 11 |
| complexo | 11 |
| contexto | 11 |
| desconexĂŁo | 11 |
| dieta | 11 |
| disciplina | 11 |
| diversas | 11 |
| ecossistĂȘmicos | 11 |
| eficiĂȘncia | 11 |
| estudar | 11 |
| evolutivas | 11 |
| expansĂŁo | 11 |
| fluxo | 11 |
| humanos | 11 |
| indicam | 11 |
| manguezais | 11 |
| mistos | 11 |
| porque | 11 |
| prĂĄticas | 11 |
| presentes | 11 |
| probabilidade | 11 |
| prole | 11 |
| reprodução | 11 |
| resposta | 11 |
| territĂłrio | 11 |
| usando | 11 |
| valor | 11 |
| afetar | 10 |
| ano | 10 |
| biolĂłgicas | 10 |
| conjunto | 10 |
| cĂłpulas | 10 |
| décadas | 10 |
| declĂnio | 10 |
| diplĂłides | 10 |
| discutir | 10 |
| distĂąncia | 10 |
| estar | 10 |
| exemplos | 10 |
| florais | 10 |
| humana | 10 |
| intra | 10 |
| isolamento | 10 |
| linhagens | 10 |
| mamĂferos | 10 |
| maneira | 10 |
| manter | 10 |
| matrizes | 10 |
| mestrado | 10 |
| mostrar | 10 |
| nativas | 10 |
| neotropical | 10 |
| ocorre | 10 |
| peixes | 10 |
| planta | 10 |
| portanto | 10 |
| possĂveis | 10 |
| problemas | 10 |
| projetos | 10 |
| questÔes | 10 |
| similares | 10 |
| solo | 10 |
| sul | 10 |
| sustentĂĄvel | 10 |
| unidades | 10 |
| visĂŁo | 10 |
| acesso | 9 |
| alimento | 9 |
| and | 9 |
| aplicaçÔes | 9 |
| associados | 9 |
| avaliação | 9 |
| balanço | 9 |
| capazes | 9 |
| central | 9 |
| cientĂfica | 9 |
| corpo | 9 |
| curso | 9 |
| destas | 9 |
| embora | 9 |
| ensino | 9 |
| frutos | 9 |
| funcionamento | 9 |
| gĂȘnico | 9 |
| hipĂłteses | 9 |
| hoje | 9 |
| i.e | 9 |
| identificar | 9 |
| ilha | 9 |
| impacto | 9 |
| implicaçÔes | 9 |
| inter | 9 |
| irei | 9 |
| modo | 9 |
| ninhos | 9 |
| nordeste | 9 |
| ovos | 9 |
| parĂąmetros | 9 |
| permite | 9 |
| pessoas | 9 |
| planejamento | 9 |
| polinizadores | 9 |
| pĂłs | 9 |
| poucas | 9 |
| predação | 9 |
| predadores | 9 |
| realizados | 9 |
| recentemente | 9 |
| representam | 9 |
| reprodutiva | 9 |
| revista | 9 |
| serviço | 9 |
| social | 9 |
| sociedade | 9 |
| sĂłcio | 9 |
| sub | 9 |
| sugerem | 9 |
| temperatura | 9 |
| testamos | 9 |
| testar | 9 |
| tipos | 9 |
| Ășltimas | 9 |
| utilizadas | 9 |
props <- text %>%
count(gender, word) %>%
#filter(n>1) %>% # removendo word ditas apenas uma vez
group_by(gender) %>%
mutate(proportion = n / sum(n)) %>%
pivot_wider(names_from = gender, values_from = c(proportion,n))library(scales)
ggplot(props, aes(x=proportion_M,, y=proportion_F),
color=abs(proportion_F-proportion_M)) +
geom_abline(color = "gray40", lty = 2) +
geom_jitter(alpha = 0.1, size = 2.5, width = 0.3, height = 0.3) +
geom_text(aes(label=word),check_overlap = TRUE,vjust = 1.5) +
scale_x_log10(labels = percent_format(), limits=c(0.0005,0.03)) +
scale_y_log10(labels = percent_format(),limits=c(0.0005,0.03)) +
scale_color_gradient(low = "blue", high = "red") ggplot(props, aes(x=proportion_M, y=proportion_F))+ geom_point(alpha=0.1)+
geom_abline(color = "gray40", lty = 2) +
geom_text(aes(label=word),check_overlap = TRUE)ggplot(props, aes(x=n_M, y=n_F))+ geom_point(alpha=0.1)+
geom_abline(color = "gray40", lty = 2) +
geom_text(aes(label=word),check_overlap = TRUE) +
xlim(-1,30) + ylim(-1,30)seleciona <- pala %>% arrange(desc(n)) %>% filter(n>8)
props <- text %>% filter(word %in% seleciona$word) %>%
count(gender, word) %>%
group_by(gender) %>%
mutate(proportion = n / sum(n)) %>%
pivot_wider(names_from = gender, values_from = c(proportion,n))
test <- props %>% arrange(desc(proportion_F), desc(proportion_M)) %>%
mutate(ntot = n_F + n_M) %>%
mutate(word = fct_reorder(word,(ntot),max))
test$proportion_F <- test$proportion_F*-1
test <- test [,1:3] %>%
pivot_longer(2:3,names_to = "gender", values_to ="proportion")
test %>%
ggplot(aes(x=proportion, y=word,fill=gender)) +
geom_col()+ ylab("") + xlab("Proportion")+
scale_fill_manual(name="gender", values=c("#6D57CF","#FCA532"),
labels=c("F", "M"))ggsave("figures/title_wordFrequency.jpeg", units="in", width=7, height=7, dpi=300)word cloud
textplot_wordcloud(x=dfm(tokens(text$word)))par(mfrow=c(1,2))
textplot_wordcloud(x=dfm(tokens(text$word[text$gender=="F"])),
col="#6D57CF")
par(new=T)
textplot_wordcloud(x=dfm(tokens(text$word[text$gender=="M"])),
col="#FCA532")TF IDF
text_id <- text %>% count(gender, word) %>%
bind_tf_idf(word, gender, n) %>%
arrange(desc(tf_idf))
#text_idtext_id$word <- as.factor(text_id$word)
text_id %>%
group_by(gender) %>%
arrange(desc(tf_idf)) %>%
top_n(5, tf_idf) %>%
ggplot(aes(x = tf_idf, y = reorder(word, tf_idf), fill = gender)) +
geom_col(show.legend = FALSE) +
labs(x = NULL, y = "tf-idf") +
facet_wrap(~gender, scales = "free") +
theme_minimal()